hms_string class for time recording purposesdataset object# to ensure that the current gpu utilization is 0
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
process = psutil.Process(os.getpid())
print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()
Collecting gputil Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz Building wheels for collected packages: gputil Building wheel for gputil (setup.py) ... done Created wheel for gputil: filename=GPUtil-1.4.0-cp36-none-any.whl size=7411 sha256=00b5ac2f641684dd09dfa7cdf45648d318e873018708b70db1f12d46e21e8a05 Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd Successfully built gputil Installing collected packages: gputil Successfully installed gputil-1.4.0 Requirement already satisfied: psutil in /usr/local/lib/python3.6/dist-packages (5.4.8) Requirement already satisfied: humanize in /usr/local/lib/python3.6/dist-packages (0.5.1) Gen RAM Free: 26.4 GB | Proc size: 111.8 MB GPU RAM Free: 16280MB | Used: 0MB | Util 0% | Total 16280MB
Please do not use this step unless the cluster you are allocated to is quite full.
## if utilization is > 0, run this code(keep running this cell and the above cell till the util number is 0%):
## NOTE THAT RUNNING THIS MIGHT KILL GPU SESSION AND RESULT IN DATA LOSS(NOT ADVICABLE TO KEEP ON REUSING)
# !kill -9 -1
This entire section can be omitted if users are not utilizing GPU at all.
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1323454165332263671
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15701340352
locality {
bus_id: 1
links {
}
}
incarnation: 16760273811098444455
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"
]
!nvidia-smi
Fri Feb 19 12:57:58 2021
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |
| N/A 45C P0 34W / 250W | 351MiB / 16280MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
+-----------------------------------------------------------------------------+
This is to clone the entire repo from github. Please do not execute this unless you are running in a different session of google colab and that the previous dataset has been wiped off. It might take a while.
# this is from zhan kang repository where he uploaded the dataset to instead of from google drive.
!git clone https://github.com/ngzhankang/Deep-Learning_ca2.git
Cloning into 'Deep-Learning_ca2'... remote: Enumerating objects: 54, done. remote: Counting objects: 100% (54/54), done. remote: Compressing objects: 100% (53/53), done. remote: Total 202564 (delta 11), reused 4 (delta 1), pack-reused 202510 Receiving objects: 100% (202564/202564), 1.30 GiB | 38.14 MiB/s, done. Resolving deltas: 100% (19/19), done. Checking out files: 100% (202605/202605), done.
# just in case sample_data is in our working directory, get rid of it to also save space lol
!rm -rf sample_data
We do this to check that we are doing in the right directory before proceeding.
# List current directory
!ls
Deep-Learning_ca2
imgalignceleba: All the face images, cropped and aligned.listevalpartition.csv: Recommended partitioning of images into training, validation, testing sets. Images 1-162770 are training, 162771-182637 are validation, 182638-202599 are testing.listbboxceleba.csv: Bounding box information for each image. "x1" and "y1" represent the upper left point coordinate of bounding box. "width" and "height" represent the width and height of bounding box.listlandmarksalign_celeba.csv: Image landmarks and their respective coordinates. There are 5 landmarks: left eye, right eye, nose, left mouth, right mouth.listattrceleba.csv: Attribute labels for each image. There are 40 attributes. "1" represents positive while "-1" represents negative.
# suppress future warnings
import warnings
warnings.filterwarnings('ignore')
# check versions of libraries we are going to use
%tensorflow_version 2.x
import os
import tensorflow
import sklearn
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import platform
message=" Versions "
print("*"*len(message))
print(message)
print("*"*len(message))
print("Tensorflow version={}".format(tensorflow.__version__))
print("Keras version={}".format(tensorflow.keras.__version__))
print("Sklearn version={}".format(sklearn.__version__))
print("Numpy version={}".format(np.__version__))
print("Pandas version={}".format(pd.__version__))
print("Seaborn version={}".format(sns.__version__))
print("Matplotlib version={}".format(matplotlib.__version__))
print("Python version={}".format(platform.python_version()))
************************
Versions
************************
Tensorflow version=2.4.1
Keras version=2.4.0
Sklearn version=0.22.2.post1
Numpy version=1.19.5
Pandas version=1.1.5
Seaborn version=0.11.1
Matplotlib version=3.2.2
Python version=3.6.9
# importing necessary libraries for this project
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Reshape, Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from numpy.random import random
import time
import os
# we will need the InceptionV3 model for FID evaluation calculation
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from scipy.linalg import sqrtm
from skimage.transform import resize
from numpy import iscomplexobj, cov, trace
from tqdm import tqdm
from numpy import asarray
# for adding support to different image file type
from PIL import Image
# for processing images
import skimage.transform
from skimage import data, io
# For displaying image
from IPython import display
%matplotlib inline
# get the links of the dataset that are stored in the github repo
main_dir_url = 'Deep-Learning_ca2/dataset/'
images_folder_url = main_dir_url + 'img_align_celeba/'
list_attr_celeba_csv_url = main_dir_url + 'list_attr_celeba.csv'
list_bbox_celeba_csv_url = main_dir_url + 'list_bbox_celeba.csv'
list_eval_partition_csv_url = main_dir_url + 'list_eval_partition.csv'
list_landmarks_align_celeba_csv_url = main_dir_url + 'list_landmarks_align_celeba.csv'
# now using the links we load into panda dataframes
list_attr_celeba_csv = pd.read_csv(list_attr_celeba_csv_url, delimiter=',')
list_bbox_celeba_csv = pd.read_csv(list_bbox_celeba_csv_url, delimiter=',')
list_eval_partition_csv = pd.read_csv(list_eval_partition_csv_url, delimiter=',')
list_landmarks_align_celeba_csv = pd.read_csv(list_landmarks_align_celeba_csv_url, delimiter=',')
# give names to each pandas dataframe
list_attr_celeba_csv.dataframeName = 'list_attr_celeba.csv'
list_bbox_celeba_csv.dataframeName = 'list_bbox_celeba.csv'
list_eval_partition_csv.dataframeName = 'list_eval_partition.csv'
list_landmarks_align_celeba_csv.dataframeName = 'list_landmarks_align_celeba.csv'
hms_string class for time recording purposes¶We create hms_string class for time recording purposes later on when we train our models, so that we can see the time elapsed.
# nicely formatted time string
def hms_string(sec_elapsed):
h = int(sec_elapsed / (60 * 60))
m = int((sec_elapsed % (60 * 60)) / 60)
s = sec_elapsed % 60
return "{}:{:>02}:{:>05.2f}".format(h, m, s)
Take a peek look at what is inside the respective dataframes first before we do something to it.
# list all the availabe csv files and data in our dataset folder
print(os.listdir(main_dir_url))
['list_eval_partition.csv', 'img_align_celeba', 'list_landmarks_align_celeba.csv', 'list_attr_celeba.csv', 'list_bbox_celeba.csv']
# get 5 results from list_attr_celeba.csv (1 means positive and -1 means negative)
list_attr_celeba_csv.head(5)
| image_id | 5_o_Clock_Shadow | Arched_Eyebrows | Attractive | Bags_Under_Eyes | Bald | Bangs | Big_Lips | Big_Nose | Black_Hair | Blond_Hair | Blurry | Brown_Hair | Bushy_Eyebrows | Chubby | Double_Chin | Eyeglasses | Goatee | Gray_Hair | Heavy_Makeup | High_Cheekbones | Male | Mouth_Slightly_Open | Mustache | Narrow_Eyes | No_Beard | Oval_Face | Pale_Skin | Pointy_Nose | Receding_Hairline | Rosy_Cheeks | Sideburns | Smiling | Straight_Hair | Wavy_Hair | Wearing_Earrings | Wearing_Hat | Wearing_Lipstick | Wearing_Necklace | Wearing_Necktie | Young | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 000001.jpg | -1 | 1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | 1 | -1 | 1 | -1 | -1 | 1 | -1 | -1 | 1 | -1 | -1 | -1 | 1 | 1 | -1 | 1 | -1 | 1 | -1 | -1 | 1 |
| 1 | 000002.jpg | -1 | -1 | -1 | 1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | 1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 |
| 2 | 000003.jpg | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | 1 | 1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | 1 |
| 3 | 000004.jpg | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | 1 | -1 | 1 | -1 | 1 | 1 | -1 | 1 |
| 4 | 000005.jpg | -1 | 1 | 1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | 1 | 1 | -1 | -1 | 1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | -1 | 1 | -1 | -1 | 1 |
# replace -1 (negative) to 0
list_attr_celeba_csv.replace(to_replace=-1, value=0, inplace=True)
# describe list_attr_celeba.csv
list_attr_celeba_csv.describe()
| 5_o_Clock_Shadow | Arched_Eyebrows | Attractive | Bags_Under_Eyes | Bald | Bangs | Big_Lips | Big_Nose | Black_Hair | Blond_Hair | Blurry | Brown_Hair | Bushy_Eyebrows | Chubby | Double_Chin | Eyeglasses | Goatee | Gray_Hair | Heavy_Makeup | High_Cheekbones | Male | Mouth_Slightly_Open | Mustache | Narrow_Eyes | No_Beard | Oval_Face | Pale_Skin | Pointy_Nose | Receding_Hairline | Rosy_Cheeks | Sideburns | Smiling | Straight_Hair | Wavy_Hair | Wearing_Earrings | Wearing_Hat | Wearing_Lipstick | Wearing_Necklace | Wearing_Necktie | Young | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.00000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 | 202599.000000 |
| mean | 0.111136 | 0.266981 | 0.512505 | 0.204572 | 0.022443 | 0.151575 | 0.240796 | 0.234532 | 0.239251 | 0.147992 | 0.050899 | 0.205194 | 0.142168 | 0.057567 | 0.046688 | 0.065119 | 0.062764 | 0.041950 | 0.386922 | 0.455032 | 0.416754 | 0.483428 | 0.041545 | 0.115149 | 0.834940 | 0.284143 | 0.042947 | 0.277445 | 0.079778 | 0.065721 | 0.056511 | 0.48208 | 0.208402 | 0.319567 | 0.188925 | 0.048460 | 0.472436 | 0.122967 | 0.072715 | 0.773617 |
| std | 0.314301 | 0.442383 | 0.499845 | 0.403389 | 0.148121 | 0.358610 | 0.427568 | 0.423707 | 0.426627 | 0.355093 | 0.219791 | 0.403844 | 0.349223 | 0.232923 | 0.210971 | 0.246736 | 0.242539 | 0.200475 | 0.487047 | 0.497975 | 0.493023 | 0.499727 | 0.199548 | 0.319202 | 0.371236 | 0.451006 | 0.202738 | 0.447739 | 0.270950 | 0.247794 | 0.230906 | 0.49968 | 0.406167 | 0.466310 | 0.391450 | 0.214737 | 0.499241 | 0.328400 | 0.259669 | 0.418491 |
| min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 50% | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 75% | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.00000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 |
| max | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.00000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
# look for correlation between labels using heatmap
f, ax = plt.subplots(figsize=(30, 30))
corr = list_attr_celeba_csv.iloc[:,0:].corr()
hm = sns.heatmap(round(corr,2), annot=True, ax=ax, cmap="coolwarm",fmt='.2f', linewidths=.05)
f.subplots_adjust(top=0.93)
t= f.suptitle('CelebA Attributes Correlation', fontsize=16, x=0.45)
# get 5 results from list_bbox_celeba.csv (the bounding box of the image)
list_bbox_celeba_csv.head(5)
| image_id | x_1 | y_1 | width | height | |
|---|---|---|---|---|---|
| 0 | 000001.jpg | 95 | 71 | 226 | 313 |
| 1 | 000002.jpg | 72 | 94 | 221 | 306 |
| 2 | 000003.jpg | 216 | 59 | 91 | 126 |
| 3 | 000004.jpg | 622 | 257 | 564 | 781 |
| 4 | 000005.jpg | 236 | 109 | 120 | 166 |
# get 5 results from list_landmarks_align_celeba.csv (landmarks and respective coords of the celeb faces)
list_landmarks_align_celeba_csv.head(5)
| image_id | lefteye_x | lefteye_y | righteye_x | righteye_y | nose_x | nose_y | leftmouth_x | leftmouth_y | rightmouth_x | rightmouth_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 000001.jpg | 69 | 109 | 106 | 113 | 77 | 142 | 73 | 152 | 108 | 154 |
| 1 | 000002.jpg | 69 | 110 | 107 | 112 | 81 | 135 | 70 | 151 | 108 | 153 |
| 2 | 000003.jpg | 76 | 112 | 104 | 106 | 108 | 128 | 74 | 156 | 98 | 158 |
| 3 | 000004.jpg | 72 | 113 | 108 | 108 | 101 | 138 | 71 | 155 | 101 | 151 |
| 4 | 000005.jpg | 66 | 114 | 112 | 112 | 86 | 119 | 71 | 147 | 104 | 150 |
# get the 5 results from list_eval_partition.csv\
list_eval_partition_csv.head(5)
| image_id | partition | |
|---|---|---|
| 0 | 000001.jpg | 0 |
| 1 | 000002.jpg | 0 |
| 2 | 000003.jpg | 0 |
| 3 | 000004.jpg | 0 |
| 4 | 000005.jpg | 0 |
# get the facial attributes provided by the dataset
for i, j in enumerate(list_attr_celeba_csv.columns):
print(i, j)
0 image_id 1 5_o_Clock_Shadow 2 Arched_Eyebrows 3 Attractive 4 Bags_Under_Eyes 5 Bald 6 Bangs 7 Big_Lips 8 Big_Nose 9 Black_Hair 10 Blond_Hair 11 Blurry 12 Brown_Hair 13 Bushy_Eyebrows 14 Chubby 15 Double_Chin 16 Eyeglasses 17 Goatee 18 Gray_Hair 19 Heavy_Makeup 20 High_Cheekbones 21 Male 22 Mouth_Slightly_Open 23 Mustache 24 Narrow_Eyes 25 No_Beard 26 Oval_Face 27 Pale_Skin 28 Pointy_Nose 29 Receding_Hairline 30 Rosy_Cheeks 31 Sideburns 32 Smiling 33 Straight_Hair 34 Wavy_Hair 35 Wearing_Earrings 36 Wearing_Hat 37 Wearing_Lipstick 38 Wearing_Necklace 39 Wearing_Necktie 40 Young
We check if there is any missing values in our attribute dataframe before proceeding on.
# get count of null values in attribute dataframe
list_attr_celeba_csv.isnull().sum()
image_id 0 5_o_Clock_Shadow 0 Arched_Eyebrows 0 Attractive 0 Bags_Under_Eyes 0 Bald 0 Bangs 0 Big_Lips 0 Big_Nose 0 Black_Hair 0 Blond_Hair 0 Blurry 0 Brown_Hair 0 Bushy_Eyebrows 0 Chubby 0 Double_Chin 0 Eyeglasses 0 Goatee 0 Gray_Hair 0 Heavy_Makeup 0 High_Cheekbones 0 Male 0 Mouth_Slightly_Open 0 Mustache 0 Narrow_Eyes 0 No_Beard 0 Oval_Face 0 Pale_Skin 0 Pointy_Nose 0 Receding_Hairline 0 Rosy_Cheeks 0 Sideburns 0 Smiling 0 Straight_Hair 0 Wavy_Hair 0 Wearing_Earrings 0 Wearing_Hat 0 Wearing_Lipstick 0 Wearing_Necklace 0 Wearing_Necktie 0 Young 0 dtype: int64
# see the number of rows and columns for each dataframe
nRow1, nCol1 = list_attr_celeba_csv.shape
nRow2, nCol2 = list_bbox_celeba_csv.shape
nRow3, nCol3 = list_eval_partition_csv.shape
nRow4, nCol4 = list_landmarks_align_celeba_csv.shape
# print number of rows and columns for each dataframe
print(f'There are {nRow1} rows and {nCol1} columns for {list_attr_celeba_csv.dataframeName}')
print(f'There are {nRow2} rows and {nCol2} columns for {list_bbox_celeba_csv.dataframeName}')
print(f'There are {nRow3} rows and {nCol3} columns for {list_eval_partition_csv.dataframeName}')
print(f'There are {nRow4} rows and {nCol4} columns for {list_landmarks_align_celeba_csv.dataframeName}')
There are 202599 rows and 41 columns for list_attr_celeba.csv There are 202599 rows and 5 columns for list_bbox_celeba.csv There are 202599 rows and 2 columns for list_eval_partition.csv There are 202599 rows and 11 columns for list_landmarks_align_celeba.csv
# plot out a face of a celebrity to see how he/she looks like
# get a random sample picture from the dataset
EXAMPLE_PIC = images_folder_url + '000800.jpg'
img = load_img(EXAMPLE_PIC)
plt.axis('off')
plt.imshow(img)
list_attr_celeba_csv.loc[list_attr_celeba_csv['image_id'] == EXAMPLE_PIC.split('/')[-1], ['Smiling', 'Male', 'Young']]
| Smiling | Male | Young | |
|---|---|---|---|
| 799 | 1 | 0 | 0 |
# get the image size (y, x)
img.size
(178, 218)
# see the distribution of people that are bald/not bald
plt.title("Bald/not bald Countplot")
sns.countplot(list_attr_celeba_csv["Bald"])
plt.show()
# see the distribution of people with open/closed mouths
plt.title("Open/closed mouth Countplot")
sns.countplot(list_attr_celeba_csv["Mouth_Slightly_Open"])
plt.show()
# see the distribution of people with mustache/no mustache
plt.title("Mustache/no mustache Countplot")
sns.countplot(list_attr_celeba_csv["Mustache"])
plt.show()
# see the gender distribution
plt.title('Gender Countplot')
sns.countplot(list_attr_celeba_csv["Male"])
plt.show()
We will now proceed on to only get the images of celebrities that are bald, have their mouth slighly opened, have mustache, and are males so that we can train our generator and discriminator for this specific group of celebrities. This reducess the training size we will be using later on and make the image looks clearer.
# select only the respective attribute columns that we want to filter by
train_dataset = list_attr_celeba_csv.loc[:, list_attr_celeba_csv.columns.intersection(['image_id', 'Bald', 'Mustache', 'Male', 'Smiling'])]
# filter bald people which is marked as 1
train_dataset = train_dataset[train_dataset.Bald.isin([1])]
# filter people with Mustache which is marked as 1
train_dataset = train_dataset[train_dataset.Mustache.isin([1])]
# filter only the males one which is marked as 1
train_dataset = train_dataset[train_dataset.Male.isin([1])]
# filter only the Smiling one which is marked as 1
train_dataset = train_dataset[train_dataset.Smiling.isin([1])]
# see the current dataframe first
train_dataset.head(5)
| image_id | Bald | Male | Mustache | Smiling | |
|---|---|---|---|---|---|
| 208 | 000209.jpg | 1 | 1 | 1 | 1 |
| 2358 | 002359.jpg | 1 | 1 | 1 | 1 |
| 3643 | 003644.jpg | 1 | 1 | 1 | 1 |
| 3980 | 003981.jpg | 1 | 1 | 1 | 1 |
| 4381 | 004382.jpg | 1 | 1 | 1 | 1 |
# re-check again to see if there is any values with 0(which means negative)
train_dataset.isnull().values.any()
False
# now we shall check if they are partitioned correctly
print(train_dataset.value_counts())
image_id Bald Male Mustache Smiling
202561.jpg 1 1 1 1 1
051525.jpg 1 1 1 1 1
063444.jpg 1 1 1 1 1
064107.jpg 1 1 1 1 1
066000.jpg 1 1 1 1 1
..
139104.jpg 1 1 1 1 1
139968.jpg 1 1 1 1 1
140646.jpg 1 1 1 1 1
141234.jpg 1 1 1 1 1
000209.jpg 1 1 1 1 1
Length: 322, dtype: int64
# take the values from image_id and convert to numpy array so that we can read from these files
# we will take a look at the first 5 only as it is a long list
images_array = train_dataset[['image_id']].to_numpy()
images_array[:5]
array([['000209.jpg'],
['002359.jpg'],
['003644.jpg'],
['003981.jpg'],
['004382.jpg']], dtype=object)
# append to files array and change dtype to unicode to make it be able to fit into tensorflow later on.
files = []
for idx, arr in enumerate(images_array):
files.append(images_array[idx][0])
files = np.array(files, dtype=np.unicode)
# see 30 random images from the new filtered dataset
plt.figure(figsize=(20,10))
for i, name in enumerate(files[:30]):
plt.subplot(3,10,i+1)
img = plt.imread(images_folder_url + '/' + name)
plt.imshow(img)
plt.title(name)
plt.axis('off')
plt.tight_layout()
# get the max pix value from an image. We will take image 000800.jpg
sample_image = Image.open(EXAMPLE_PIC)
pixels = np.asarray(sample_image)
print('Min px: %.3f, Max px: %.3f' % (pixels.min(), pixels.max()))
Min px: 0.000, Max px: 255.000
# do 1 last check to see how many images we have now
print('Total images: ', len(files))
Total images: 322
Normalize the training/test images by dividing with 255. We also need to reshape the image to reduce computation power.
# fit all the .jpg files into numpy array and normalize them in the process. this is because we are going to use tanh as the activation function for our generator
# using the 80%/20% training and test data split, we will split our total images into that partition
# so we will use around 200 images for train and 40 images for testing
# we will then do data augmentation later on so that we can get more images
NTRAIN = 200
NTEST = 40
nm_imgs = files
# name of the jpg files for training dataset
nm_imgs_train = nm_imgs[:NTRAIN]
# name of the jpg files for training dataset
nm_imgs_test = nm_imgs[258:]
# shrink the image here as GAN requires lots of computational time(we have to as google colab always crashes)
img_shape = (128, 128, 3)
# create a function to normalize images and shrink the images
def get_npdata(nm_imgs_train):
X_train = []
for i, myid in enumerate(nm_imgs_train):
image = load_img(images_folder_url + '/' + myid, target_size=img_shape[:2])
image = img_to_array(image)/255.0
image = image*2-1 # we do this so that values are between -1 and 1 since we are going to use tanh activation function
X_train.append(image)
X_train = np.array(X_train)
return X_train
X_train = get_npdata(nm_imgs_train)
X_test = get_npdata(nm_imgs_test)
# get the shape of the current training dataset to check that it is correctly formatted.
print("X_train.shape = {}".format(X_train.shape))
print("X_test.shape = {}".format(X_test.shape))
X_train.shape = (200, 128, 128, 3) X_test.shape = (64, 128, 128, 3)
# now we will plot out the reshaped image from the training dataset to see what's the difference after normalizing.
fig = plt.figure(figsize=(20,10))
nplot = 7
for count in range(1,nplot):
ax = fig.add_subplot(1,nplot,count)
ax.imshow(X_train[count])
ax.axis('off')
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
# now we will plot out the reshaped image from the test dataset to see what's the difference after normalizing.
fig = plt.figure(figsize=(20,10))
nplot = 7
for count in range(1,nplot):
ax = fig.add_subplot(1,nplot,count)
ax.imshow(X_test[count])
ax.axis('off')
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
dataset object.¶We place the training images into tensorflow dataset
# define the necessary params here first for later usage in the training process
# we will set the buffer size to be x100 of the batch size
BATCH_SIZE = 8
BUFFER_SIZE = 800 # this means how random the 1st element between the size of the BUFFER_SIZE.
# batch and shuffle the data
train_dataset = tf.data.Dataset.from_tensor_slices(X_train).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = tf.data.Dataset.from_tensor_slices(X_test).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
# see the dtyle and the shape(it is not called size in tensorflow) of the training and test dataset.
print(train_dataset)
print(test_dataset)
<BatchDataset shapes: (None, 128, 128, 3), types: tf.float32> <BatchDataset shapes: (None, 128, 128, 3), types: tf.float32>
We will now create the generator model for our GAN. We will follow with the original architecture.
# we will first define the parameters to be used later on for the generator
LATENT_DIM = 256 # Dimensionality of the noise vector.
CHANNELS = 3
# create our generator here
def create_generator():
model = Sequential(name="generator_model")
# intialize random gaussian weight with mean 0 and S.D. 0.02
init = RandomNormal(mean=0.0, stddev=0.02)
# project
model.add(Dense(1024*8*8, activation='relu', use_bias=False, kernel_initializer=init, input_dim=LATENT_DIM))
# reshape
model.add(Reshape((8, 8, 1024)))
assert model.output_shape == (None, 8, 8, 1024) # Note: None is the batch size
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.2))
# layer 2 transposed convolutional layer (upsampling)
# we are just going to use strides as it works the same as UpSampling2D
model.add(Conv2DTranspose(1024, kernel_size=5, strides=(2,2), padding='same', use_bias=False, kernel_initializer=init))
assert model.output_shape == (None, 16, 16, 1024) # none is the batch size, and a check to see if the output shape has been upsampled or not
model.add(BatchNormalization(momentum=0.9))
model.add(LeakyReLU(alpha=0.2))
# layer 3 transposed convolutional layer (upsampling)
# we are just going to use strides as it works the same as UpSampling2D
model.add(Conv2DTranspose(512, kernel_size=5, strides=(2,2), padding='same', use_bias=False, kernel_initializer=init))
assert model.output_shape == (None, 32, 32, 512) # none is the batch size, and a check to see if the output shape has been upsampled or not
model.add(BatchNormalization(momentum=0.9))
model.add(LeakyReLU(alpha=0.2))
# layer 4 transposed convolutional layer (upsampling)
model.add(Conv2DTranspose(256, kernel_size=5, strides=(2,2), padding='same', use_bias=False, kernel_initializer=init))
assert model.output_shape == (None, 64, 64, 256) # none is the batch size, and a check to see if the output shape has been upsampled or not
model.add(BatchNormalization(momentum=0.9))
model.add(LeakyReLU(alpha=0.2))
# layer 5 transposed convolutional layer (upsampling)
model.add(Conv2DTranspose(CHANNELS, kernel_size=5, strides=(2,2), padding='same', use_bias=False, kernel_initializer=init, activation='tanh'))
assert model.output_shape == (None, 128, 128, 3) # check to see if the output shape has been upsampled or not
return model
# get the summary of the generator model
generator = create_generator()
generator.summary()
Model: "generator_model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 65536) 16777216 _________________________________________________________________ reshape (Reshape) (None, 8, 8, 1024) 0 _________________________________________________________________ batch_normalization (BatchNo (None, 8, 8, 1024) 4096 _________________________________________________________________ leaky_re_lu (LeakyReLU) (None, 8, 8, 1024) 0 _________________________________________________________________ conv2d_transpose (Conv2DTran (None, 16, 16, 1024) 26214400 _________________________________________________________________ batch_normalization_1 (Batch (None, 16, 16, 1024) 4096 _________________________________________________________________ leaky_re_lu_1 (LeakyReLU) (None, 16, 16, 1024) 0 _________________________________________________________________ conv2d_transpose_1 (Conv2DTr (None, 32, 32, 512) 13107200 _________________________________________________________________ batch_normalization_2 (Batch (None, 32, 32, 512) 2048 _________________________________________________________________ leaky_re_lu_2 (LeakyReLU) (None, 32, 32, 512) 0 _________________________________________________________________ conv2d_transpose_2 (Conv2DTr (None, 64, 64, 256) 3276800 _________________________________________________________________ batch_normalization_3 (Batch (None, 64, 64, 256) 1024 _________________________________________________________________ leaky_re_lu_3 (LeakyReLU) (None, 64, 64, 256) 0 _________________________________________________________________ conv2d_transpose_3 (Conv2DTr (None, 128, 128, 3) 19200 ================================================================= Total params: 59,406,080 Trainable params: 59,400,448 Non-trainable params: 5,632 _________________________________________________________________
Now we will use the generator, which we have yet to feed in any training dataset, to generate an image.
# generate noise and create a random generated image.
noise = tf.random.uniform([4,256], minval=-1, maxval=1)
generated_image = generator(noise, training=False)
# see the dtype of generated_image
type(generated_image)
tensorflow.python.framework.ops.EagerTensor
# plot out the image
fig, axes = plt.subplots(1, len(generated_image), figsize=(20, 5))
for index, ax in enumerate(axes):
ax.axis('off')
image_array = generated_image[index]
ax.imshow(image_array)
fig.suptitle("Generated images "+ " ",fontsize=15)
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
We will now create the discriminator model for our GAN. It is almost similar to a standard CNN architecture with some changes.
# create the discriminator model
def create_discriminator():
model = Sequential(name="discriminator_model")
# layer 1(downsample to 64x64)
model.add(Conv2D(128, kernel_size=3, strides=2, padding='same', input_shape=[128, 128, 3])) # the input shape here must tally with the output shape from the generator
# model.add(BatchNormalization(momentum=0.9))
model.add(LeakyReLU(alpha=0.2))
# model.add(Dropout(0.25))
# layer 2(downsample to 32x32)
model.add(Conv2D(256, kernel_size=3, strides=2, padding='same'))
model.add(BatchNormalization(momentum=0.9))
model.add(LeakyReLU(alpha=0.2))
# model.add(Dropout(0.25))
# layer 3(downsample to 16x16)
model.add(Conv2D(512, kernel_size=3, strides=2, padding='same'))
model.add(BatchNormalization(momentum=0.9))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.5))
# layer 4(downsample to 8x8)
model.add(Conv2D(1024, kernel_size=3, strides=2, padding='same'))
model.add(BatchNormalization(momentum=0.9))
model.add(LeakyReLU(alpha=0.2))
# model.add(Dropout(0.25))
# # layer 5(downsample to 4x4)
# model.add(Conv2D(256, kernel_size=5, strides=2, padding='same'))
# model.add(BatchNormalization(momentum=0.8))
# model.add(LeakyReLU(alpha=0.2))
# model.add(Dropout(0.25))
# # layer 6(downsample to 2x2)
# model.add(Conv2D(256, kernel_size=5, strides=2, padding='same'))
# model.add(BatchNormalization(momentum=0.8))
# model.add(LeakyReLU(alpha=0.2))
# model.add(Dropout(0.25))
# flatten layer
model.add(Flatten())
# output layer
model.add(Dense(1, activation='sigmoid'))
return model
# get the summary of the discriminator model
discriminator = create_discriminator()
discriminator.summary()
Model: "discriminator_model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 64, 64, 128) 3584 _________________________________________________________________ leaky_re_lu_4 (LeakyReLU) (None, 64, 64, 128) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 32, 32, 256) 295168 _________________________________________________________________ batch_normalization_4 (Batch (None, 32, 32, 256) 1024 _________________________________________________________________ leaky_re_lu_5 (LeakyReLU) (None, 32, 32, 256) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 16, 16, 512) 1180160 _________________________________________________________________ batch_normalization_5 (Batch (None, 16, 16, 512) 2048 _________________________________________________________________ leaky_re_lu_6 (LeakyReLU) (None, 16, 16, 512) 0 _________________________________________________________________ dropout (Dropout) (None, 16, 16, 512) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 8, 8, 1024) 4719616 _________________________________________________________________ batch_normalization_6 (Batch (None, 8, 8, 1024) 4096 _________________________________________________________________ leaky_re_lu_7 (LeakyReLU) (None, 8, 8, 1024) 0 _________________________________________________________________ flatten (Flatten) (None, 65536) 0 _________________________________________________________________ dense_1 (Dense) (None, 1) 65537 ================================================================= Total params: 6,271,233 Trainable params: 6,267,649 Non-trainable params: 3,584 _________________________________________________________________
Now we will use the discriminator, which we have yet to feed in any training dataset, to determine if an image is real or fake. If it is real, then the output values are positive, and negative values for fake images.
# feed into discriminator and let it decide.
decision = discriminator(generated_image)
print(decision)
tf.Tensor( [[0.5010095 ] [0.5010159 ] [0.50137734] [0.4999444 ]], shape=(4, 1), dtype=float32)
We will now define the loss function and optimizers for both of the models here for observations later on.
# this method returns a helper function to compute cross entropy loss
cross_entropy = BinaryCrossentropy(from_logits = True)
# generator loss
# the generator's loss quantifies how well it was able to trick the discrimator. Intuitively, if the generator is performing well, the discriminator will classify the fake images as real (or 1).
# here, we will compare the discriminators decisions on the generated images to an array of 1s.
# we will apply label smoothing here as we do not want the model to be too confident that its right/wrong as suggested by Salimans et. al. 2016
# smooth = 0.9
def generator_loss(fake_output):
return cross_entropy(tf.ones_like(fake_output), fake_output)
# discriminator loss
# this method quantifies how well the discriminator is able to distinguish real images from fakes. It compares the discriminator's predicitions on real images to an array of 1s
# and the dicriminator's predicitons on fake (generated) images to an array of 0s.
def discriminator_loss(real_output, fake_output):
real_loss = cross_entropy((tf.ones_like(real_output) - 0.1 + (random((tf.ones_like(real_output)).shape) * 0.1)), real_output)
# real_loss = cross_entropy(tf.ones_like(real_output*smooth), real_output)
# fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
fake_loss = cross_entropy((tf.zeros_like(fake_output) + random((tf.zeros_like(fake_output)).shape) * 0.1), fake_output)
total_loss = real_loss + fake_loss
return total_loss
# the discriminator and the generator optimizers are different since we will train two networks separately.
# the Adam optimization algorithm is an extension to stochastic gradient descent.
# stochastic gradient descent maintains a single learning rate (termed alpha) for all weight updates and the learning rate does not change during training.
# a learning rate is maintained for each network weight (parameter) and separately adapted as learning unfolds.
# this is optional and is an optimizer, but since there are papers recommending these parameters, we will fit them in and count it as model optimization
generator_optimizer = Adam(0.0002, beta_1=0.5)
discriminator_optimizer = Adam(0.0002, beta_1=0.5)
# create checkpoints to store training results
checkpoint_dir = '../content/Deep-Learning_ca2/training_checkpoints/'
checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt')
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
discriminator_optimizer=discriminator_optimizer,
generator=generator,
discriminator=discriminator)
# save generator weight
generator.save_weights('../content/Deep-Learning_ca2/training_checkpoints/md')
# # load back the weight and we can see that we can resume our model just like this
# # RUN THIS ONLY IF YOU WANT TO RESUME TRAINING
# generator.load_weights('../content/Deep-Learning_ca2/training_checkpoints/md')
We will now define the training loop so that we can fit in our data. We will include our train_step, generate_and_save_images and train together.
# we will define how many epochs here first, the noise dimension and how many examples to generate
# depending if you are training on different days, adjust the epoch respectively
EPOCHS = 5000
noise_dim = 256
num_examples_to_generate = 4
# we will also be reusing this noise overtime.
# NOISE = tf.random.normal([num_examples_to_generate, noise_dim], mean=0, stddev=1)
NOISE = tf.random.uniform([num_examples_to_generate, noise_dim])
def train_step(images):
noise = tf.random.uniform([BATCH_SIZE, noise_dim])
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise, training=True)
real_output = discriminator(images, training=True)
fake_output = discriminator(generated_images, training=True)
gen_loss = generator_loss(fake_output)
disc_loss = discriminator_loss(real_output, fake_output)
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
return gen_loss, disc_loss
def train(dataset, epochs):
# create an array here to record the discriminator and generator loss
history = []
for epoch in range(epochs):
start = time.time()
for image_batch in dataset:
gen_loss, disc_loss = train_step(image_batch)
# clear output after each epoch
display.clear_output(wait=True)
# save checkpoint and image every 200 epoch
if epoch % 200 == 0:
generate_and_save_images(generator, epoch, NOISE)
checkpoint.save(file_prefix = checkpoint_prefix)
# print out the time elapsed, the generator and discriminator loss
elapsed = time.time()-start
print ('Time for epoch {} is {}, GLoss: {}, DLoss: {}'.format(epoch, hms_string(elapsed), gen_loss, disc_loss))
# append the history array to insert the discriminator and generator losses
history.append({"Discriminator Loss":disc_loss,"Generator Loss":gen_loss})
# Generate on the final epoch
display.clear_output(wait=True)
generate_and_save_images(generator, epochs, NOISE)
# append the history array to insert the last discriminator and generator losses
history.append({"Discriminator Loss":disc_loss, "Generator Loss":gen_loss})
# calculate the FID score at the end to see how good it is
gan_images = generator(NOISE)
gan_images = scale_images(gan_images, upscale_shape)
test_img_act = test_imgs_fid[np.random.choice(test_imgs_fid.shape[0], num_examples_to_generate, replace=True), :]
cur_fid_score = calculate_fid(inception, test_imgs_fid, gan_images)
return history, cur_fid_score
def generate_and_save_images(model, epoch, test_input):
# Notice `training` is set to False.
# This is so all layers run in inference mode (batchnorm).
predictions = model(test_input, training=False)
fig = plt.figure(figsize=(4,4))
come_out_image = [((sample.numpy() + 1.0) * 127.5).astype(np.uint8) for sample in predictions]
fig, axes = plt.subplots(1, len(come_out_image), figsize=(20, 5))
for index, ax in enumerate(axes):
ax.axis('off')
image_array = come_out_image[index]
ax.imshow(image_array)
fig.suptitle("Generated images ",fontsize=15)
# create epoch_images folder dir if does not exist
if not os.path.exists('../content/Deep-Learning_ca2/epoch_images/'):
os.makedirs('../content/Deep-Learning_ca2/epoch_images/')
plt.savefig('../content/Deep-Learning_ca2/epoch_images/image_at_epoch_{:04d}.png'.format(epoch))
plt.show()
We will now define some functions and make use of the InceptionV3 model so that we gather the FID score of our model.
def scale_images(images, new_shape):
images_list = list()
for image in images:
new_image = resize(image, new_shape)
images_list.append(new_image)
return asarray(images_list)
# we will define how we calculate fid score here
def calculate_fid(model, act1, images2):
act2 = model.predict(images2)
mu1, sigma1 = act1.mean(axis=0), cov(act1, rowvar=False)
mu2, sigma2 = act2.mean(axis=0), cov(act1, rowvar=False)
ssdiff = np.sum((mu1-mu2) ** 2.0)
covmean = sqrtm(sigma1.dot(sigma2))
if iscomplexobj(covmean):
covmean = covmean.real
# this is the equation to calcuate fid score
fid = ssdiff + trace(sigma1 + sigma2 - 2.0 * covmean)
return fid
We will now have to create the InceptionV3 Model so that we can get our FID score. Here, we will also prepare the test images to fit inside.
upscale_shape = (299, 299, 3)
# declare the model here
inception = InceptionV3(include_top=False, pooling='avg', input_shape=upscale_shape)
if not os.path.exists('../content/Deep-Learning_ca2/test_imgs_fid.npy'):
test_imgs_fid = []
# batch size of 6 cuze our test only got 40 images, 40//6=6
batch = 6
for i in tqdm(range(len(X_test)//batch)):
temp_1 = X_test[i*batch:(i+1)*batch]
temp_2 = scale_images(temp_1.astype('float32'), upscale_shape)
test_imgs_fid.append(inception.predict(temp_2))
test_imgs_fid = (np.asarray(test_imgs_fid))
test_imgs_fid = test_imgs_fid.reshape(-1, 2048)
np.save('../content/Deep-Learning_ca2/test_imgs_fid.npy', test_imgs_fid)
else:
# load the nunpy file if not in local dir
test_imgs_fid = np.load('../content/Deep-Learning_ca2/test_imgs_fid.npy')
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 87916544/87910968 [==============================] - 1s 0us/step
100%|██████████| 10/10 [00:03<00:00, 3.33it/s]
# now train the model
%%time
history, fid_score = train(train_dataset, EPOCHS)
<Figure size 288x288 with 0 Axes>
CPU times: user 2h 4min 14s, sys: 8min 8s, total: 2h 12min 23s Wall time: 4h 31min 41s
We will plot the generator and discriminator loss graph and observe both model's loss.
# plot the loss of discriminator and generator
hist = pd.DataFrame(history)
plt.figure(figsize=(20,5))
for colnm in hist.columns:
plt.plot(hist[colnm],label=colnm)
plt.legend()
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.show()
We will create a GIF so that we can witness the overall process of the DCGAN process.
import imageio
dir_result = '/content/Deep-Learning_ca2/epoch_images/'
def makegif(dir_images):
filenames = np.sort(os.listdir(dir_images))
filenames = [ fnm for fnm in filenames if ".png" in fnm]
with imageio.get_writer(dir_images + '/image.gif', mode='I') as writer:
for filename in filenames:
image = imageio.imread(dir_images + filename)
writer.append_data(image)
# now make the gif out right now
makegif(dir_result)
# open the gif file and display gif
from IPython.display import Image
Image(open('/content/Deep-Learning_ca2/epoch_images/image.gif','rb').read())
Here, we will calculate the Frechet Inception Distance (FID). The FID score will tell us the wasserstein-2 distance between multi-variate Gaussians fitted to data embedded into a feature space.
print('FID Score: ', fid_score)
FID Score: 143.61911064694496